*****************************
** MULTINOMIAL REGRESSIONS **
*****************************
* Labussiere, Levels and Vink (2021)

* This script needs the following inputs:
	* OMTR_clusters.dta (From clustering.do)

* This script produces the following outputs:
	* FIGURES 3 4 
	* TABLES S10 S11 S12 S13 S14 S15 S16
	
	
use OMTR_clusters.dta, clear
order rinpersoon year, first
drop id time
drop ward_ord ward_hgt


******************
* Cluster labels *
******************

label define pam_ward ///
1	"Downward movers" ///
2	"Discontinued leavers" ///  labelled as "Discontinuous" in the manuscript
3	"U-turners" /// 			labelled as "Detour" in the manuscript
4	"Dropouts" ///
5	"Upward movers" ///
6	"Standard movers", replace
label values pam_ward_R6 pam_ward


******************
* Missing values *
******************

gen missing_SES = missing(nbr_children_VO1) | missing(single_parent_VO1) | missing(homeowner_alo1)
gen missing_income = (q3_st_disp_income1 == 4)
gen missing_SECM = (secm_ma_agg2 == 4 | secm_pa_agg2 == 4)
gen missing_YSM_ma_wimp = missing(YSM_ma_wimp_cat3)
gen missing_YSM_pa_wimp = missing(YSM_pa_wimp_cat3)

* Exclude those with missing parental socioeconomic status or income
keep if !missing_SES & !missing_income
* N= 12,249 


***************************
* Multinomial regressions *
***************************

******************************************************************************
* TABLE S10
* Comparison of multinomial regression results between the four stepwise models
******************************************************************************
	
* Model 1 - main variable of interest

 	set more off
	mlogit pam_ward_R6 ib2.citizenship /// 
					   , b(6) rrr 
	estimates store model_1
		
	
* Model 2 - Model 1 + sociodemographic characteristics

	set more off
	mlogit pam_ward_R6 ib2.citizenship              /// 
					   i.gender                     ///
					   i.first_born                 ///
					   i.homeowner_alo1             ///
					   i.EU_nonEU                ///
					   nbr_children_VO1             ///
					   i.single_parent_VO1          ///
					   , b(6) rrr  
	estimates store model_2
			
	
* Model 3 - Model 2 + educational characteristics

	set more off
	mlogit pam_ward_R6 ib2.citizenship              /// 
					   i.gender                     ///
					   i.first_born                 ///
					   i.homeowner_alo1             ///
					   i.EU_nonEU                ///
					   nbr_children_VO1             ///
					   i.single_parent_VO1          ///
					   i.start_group                ///
					   i.q5_citoscore               ///
					   , b(6) rrr  
	estimates store model_3
		

* Model 4 - Model 3 + confounders

******************************************************************************
* TABLE S11
* Multinomial regression results for the full model
******************************************************************************

	set more off
	mlogit pam_ward_R6 ib2.citizenship              /// 
					   i.gender                     ///
					   i.first_born					///
					   i.homeowner_alo1             ///
					   i.EU_nonEU                ///
					   nbr_children_VO1             ///
					   i.single_parent_VO1          ///
					   i.start_group                ///
					   i.q5_citoscore               ///
					   i.parent_ed_level_miss       ///
					   i.secm_ma_agg2       		///
					   i.secm_pa_agg2       		///
					   ib1.q3_st_disp_income1       ///
					   i.Dutch_home					///
					   i.YSM_ma_cat3                ///
					   i.YSM_pa_cat3                ///
					   , b(6) rrr 
	estimates store model_4_ysm_bo
	
	
	
*************
* COEFPLOTS *
*************

******************************************************************************
* FIGURE 3
* Coefplot comparing all models for the citizenship coefficient only
******************************************************************************

	coefplot (model_1, keep(Upward_movers:*.citizenship) label(Model 1) ///
			  mcolor("221 170 51") ciopts(lcolor("225 182 81" "240 218 168"))) ///
			 (model_2, keep(Upward_movers:*.citizenship) label(Model 2) ///
			  mcolor("187 85 102") ciopts(lcolor("164 65 82" "197 109 124"))) ///
			 (model_3, keep(Upward_movers:*.citizenship) label(Model 3) ///
			  mcolor("0 68 136") ciopts(lcolor("0 77 153" "0 102 209"))) ///
			 (model_4_ysm_bo, keep(Upward_movers:*.citizenship) label(Model 4) ///
			 mcolor("0 0 0") ciopts(lcolor("64 64 64" "128 128 128"))) ///
									, bylabel(Upward vs. Standard) xscale(r(0.95 1.4)) || ///
			 (model_1, keep(Downward_movers:*.citizenship))  ///
			 (model_2, keep(Downward_movers:*.citizenship)) ///
			 (model_3, keep(Downward_movers:*.citizenship)) ///
			 (model_4_ysm_bo, keep(Downward_movers:*.citizenship)) ///
									, bylabel(Downward vs. Standard) xscale(r(-0.7 1.4)) || ///
			 (model_1, keep(U_turners:*.citizenship)) ///
			 (model_2, keep(U_turners:*.citizenship)) ///
			 (model_3, keep(U_turners:*.citizenship)) ///
			 (model_4_ysm_bo, keep(U_turners:*.citizenship)) ///
									, bylabel(Detour vs. Standard) xscale(r(1 2.5)) || ///
			 (model_1, keep(Dropouts:*.citizenship)) ///
			 (model_2, keep(Dropouts:*.citizenship)) ///
			 (model_3, keep(Dropouts:*.citizenship)) ///
			 (model_4_ysm_bo, keep(Dropouts:*.citizenship)) ///
									, bylabel(Dropout vs. Standard) xscale(r(0.65 1.1)) || ///
			 (model_1, keep(Discontinued_leavers:*.citizenship)) ///
			 (model_2, keep(Discontinued_leavers:*.citizenship)) ///
			 (model_3, keep(Discontinued_leavers:*.citizenship)) ///
			 (model_4_ysm_bo, keep(Discontinued_leavers:*.citizenship)) ///
									, bylabel(Discontinued vs. Standard) xscale(r(0.65 1.1))  ///
			 xline(1) legend(region(lcolor(white)) size(small) row(1)) ///
			 xtitle("Odds-ratio (log scale)", height(5))  ///
			 eform omitted baselevels drop(_cons) xscale(log)  ///
			 msymbol(d) msize(small) levels(95 90) ciopts(lwidth(medium .)) ///
			 headings(0.citizenship = "{bf: Citizenship}", labsize(small)) ///
			 ylabel(,labsize(small)) ///
			 byopts(xrescale col(2) graphregion(fcolor(white))) ///
			 bgcolor(white) /// 
			 subtitle(,size(medsmall) margin(small) bcolor(white)) /// 
			 xsize(10) ysize(9)									

			 
******************************************************************************
* FIGURE 4
* Coefplot of average marginal effects of Dutch citizenship 
******************************************************************************	

	mlogit pam_ward_R6 ib2.citizenship              /// 
					   i.gender                     ///
					   i.first_born					///
					   i.homeowner_alo1             ///
					   i.EU_nonEU                ///
					   nbr_children_VO1             ///
					   i.single_parent_VO1          ///
					   i.start_group                ///
					   i.q5_citoscore               ///
					   i.parent_ed_level_miss       ///
					   i.secm_ma_agg2       		///
					   i.secm_pa_agg2       		///
					   ib1.q3_st_disp_income1       ///
					   i.Dutch_home					///
					   i.YSM_ma_cat3                ///
					   i.YSM_pa_cat3                ///
					   , b(6) rrr  

	margins, dydx(citizenship) post
	
	coefplot (, keep(*:1._predict) label(Downward) mcolor("27 158 119") ///
	ciopts(lcolor("27 158 119" "27 158 119")) msymbol(d)) ///
			 (, keep(*:2._predict) label(Discontinued) mcolor("186 186 186") ///
	ciopts(lcolor("186 186 186" "186 186 186")) msymbol(t)) ///
			 (, keep(*:3._predict) label(Detour) mcolor("187 85 102") ///
	ciopts(lcolor("187 85 102" "187 85 102")) msymbol(s)) ///
			 (, keep(*:4._predict) label(Dropout) mcolor("0 68 136") ///
	ciopts(lcolor("0 68 136" "0 68 136")) msymbol(x)) ///
			 (, keep(*:5._predict) label(Upward) mcolor("0 0 0") ///
	ciopts(lcolor("0 0 0" "0 0 0")) msymbol(Th)) ///
			 (, keep(*:6._predict) label(Standard) mcolor("221 170 51") ///
	ciopts(lcolor("221 170 51" "221 170 51"))) ///
			 , swapnames xline(0) graphregion(color(white)) baselevels ///
			 xtick(-0.05(0.01)0.05) grid(between glpattern(dash) glcolor(gray)) ///
			 levels(95 90)  ciopts(lwidth(1 ..) lcolor(*.4 *.6 ))

	

	
******************************************************************************	
* TABLES S12 S13
* Avrage marginal effects
******************************************************************************	

	set dp period
	mlogit pam_ward_R6 ib2.citizenship              /// 
					   i.gender                     ///
					   i.first_born                 ///
					   i.homeowner_alo1             ///
					   i.EU_nonEU                ///
					   nbr_children_VO1             ///
					   i.single_parent_VO1          ///
					   i.start_group                ///
					   i.q5_citoscore               ///
					   i.parent_ed_level_miss       ///
					   i.secm_ma_agg2       		///
					   i.secm_pa_agg2       		///
					   ib1.q3_st_disp_income1       ///
					   i.Dutch_home					///
					   i.YSM_ma_cat3                ///
					   i.YSM_pa_cat3                ///
					   , b(6) rrr  

	* TABLE S12
	margins, dydx(citizenship) 	
	* TABLE S13
	margins, dydx(parent_ed_level_miss)		

	
******************************************************************************	
* TABLES S14 S15 S16		       		
* Marginal effects at representative values 
******************************************************************************	

	* TABLE S14
	margins, dydx(citizenship) asobserved at(parent_ed_level_miss=(1 2 3))
	
	* TABLE S15
	margins, dydx(citizenship) asobserved at(YSM_ma_cat3=(1 2 3)) 

	* TABLE S16
	margins, dydx(citizenship) asobserved at(Dutch_home=(0 1)) 



	
